{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Populating the interactive namespace from numpy and matplotlib\n",
      "Sun Dec  3 07:29:51 2017       \n",
      "+-----------------------------------------------------------------------------+\n",
      "| NVIDIA-SMI 384.90                 Driver Version: 384.90                    |\n",
      "|-------------------------------+----------------------+----------------------+\n",
      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
      "|===============================+======================+======================|\n",
      "|   0  GeForce GTX 108...  Off  | 00000000:02:00.0  On |                  N/A |\n",
      "| 26%   42C    P5    24W / 250W |   1576MiB / 11172MiB |     12%      Default |\n",
      "+-------------------------------+----------------------+----------------------+\n",
      "|   1  GeForce GTX 108...  Off  | 00000000:81:00.0 Off |                  N/A |\n",
      "| 23%   37C    P8    16W / 250W |    242MiB / 11172MiB |      0%      Default |\n",
      "+-------------------------------+----------------------+----------------------+\n",
      "                                                                               \n",
      "+-----------------------------------------------------------------------------+\n",
      "| Processes:                                                       GPU Memory |\n",
      "|  GPU       PID   Type   Process name                             Usage      |\n",
      "|=============================================================================|\n",
      "+-----------------------------------------------------------------------------+\n"
     ]
    }
   ],
   "source": [
    "%pylab inline\n",
    "import numpy as np\n",
    "import time\n",
    "import sys\n",
    "import logging\n",
    "from sklearn.decomposition import TruncatedSVD as tsvd\n",
    "from h2o4gpu.solvers import TruncatedSVDH2O\n",
    "!nvidia-smi\n",
    "import sys\n",
    "import py3nvml ## pip install -e git+https://github.com/fbcotter/py3nvml#egg=py3nvml\n",
    "import subprocess\n",
    "from IPython.display import Markdown, display\n",
    "def printmd(string):\n",
    "    display(Markdown(string))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2.03 s, sys: 696 ms, total: 2.73 s\n",
      "Wall time: 2.73 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "#Set up matrix (m x n)\n",
    "np.random.seed(1234)\n",
    "m=200000; n=1000\n",
    "X = np.random.rand(m, n)\n",
    "k=10 # number of components"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "**Total time for h2o4gpu arpack tsvd is 6.568532228469849**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "h2o4gpu tsvd Singular Values\n",
      "[ 7072.42822266   138.08740234   138.0827179    137.95184326   137.91082764\n",
      "   137.89772034   137.77865601   137.74250793   137.65570068   137.64558411]\n",
      "h2o4gpu tsvd Components (V^T)\n",
      "[[ 0.03160822  0.03163449  0.03162544 ...,  0.03160141  0.03167794\n",
      "   0.0315132 ]\n",
      " [-0.03652671  0.0144819   0.0087755  ...,  0.05670537 -0.0237855\n",
      "   0.01040668]\n",
      " [ 0.00863585  0.02036315 -0.00762595 ...,  0.01559443  0.02623164\n",
      "   0.10060279]\n",
      " ..., \n",
      " [-0.02592493  0.03186912 -0.02734392 ...,  0.00698451  0.01649839  0.        ]\n",
      " [ 0.00631481  0.04533872 -0.01610234 ..., -0.00025566  0.01935858\n",
      "  -0.06476473]\n",
      " [ 0.01102729 -0.07528458 -0.06134396 ..., -0.00144295  0.01815314  0.        ]]\n",
      "h2o4gpu tsvd Explained Variance\n",
      "[ 0.08016483  0.08030165  0.08033707  0.08026209  0.08039588  0.08033238\n",
      "  0.08033585  0.08033044  0.08032776  0.08026112]\n",
      "h2o4gpu tsvd Explained Variance Ratio\n",
      "[ 0.00097478  0.00097644  0.00097687  0.00097596  0.00097759  0.00097682\n",
      "  0.00097686  0.00097679  0.00097676  0.00097595]\n",
      "CPU times: user 2.15 s, sys: 3.9 s, total: 6.05 s\n",
      "Wall time: 6.57 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "start_time = time.time()\n",
    "h2o4gpu_tsvd = TruncatedSVDH2O(n_components=k)\n",
    "h2o4gpu_tsvd.fit(X)\n",
    "end_time = time.time() - start_time\n",
    "printmd(\"**Total time for h2o4gpu arpack tsvd is \" + str(end_time) + \"**\")\n",
    "print(\"h2o4gpu tsvd Singular Values\")\n",
    "print(h2o4gpu_tsvd.singular_values_)\n",
    "print(\"h2o4gpu tsvd Components (V^T)\")\n",
    "print(h2o4gpu_tsvd.components_)\n",
    "print(\"h2o4gpu tsvd Explained Variance\")\n",
    "print(h2o4gpu_tsvd.explained_variance_)\n",
    "print(\"h2o4gpu tsvd Explained Variance Ratio\")\n",
    "print(h2o4gpu_tsvd.explained_variance_ratio_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "**Total time for arpack scikit-learn tsvd is 41.16011619567871**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tsvd Singular Values\n",
      "[ 7072.4289909    138.14048601   138.09703705   137.98470993   137.91346777\n",
      "   137.86160169   137.81041514   137.76470811   137.71770837   137.65846051]\n",
      "tsvd Components (V^T)\n",
      "[[ 0.03160822  0.03163449  0.03162544 ...,  0.0316014   0.031678\n",
      "   0.03151319]\n",
      " [ 0.02064035 -0.02891545  0.02041258 ..., -0.03163838 -0.01079971\n",
      "  -0.05860967]\n",
      " [-0.02544365  0.00285468  0.02429651 ...,  0.04693024 -0.0375039\n",
      "  -0.04562807]\n",
      " ..., \n",
      " [-0.06114168  0.02761409 -0.02933754 ...,  0.02465429  0.03333408\n",
      "   0.01499451]\n",
      " [-0.01115856  0.03547422 -0.01811966 ..., -0.00531147 -0.0035131\n",
      "  -0.09533024]\n",
      " [ 0.04268635 -0.04544683 -0.01477452 ..., -0.01384292 -0.01668161\n",
      "   0.01887234]]\n",
      "tsvd Explained Variance\n",
      "[ 0.08306231  0.09541397  0.09535396  0.0951989   0.09510062  0.09502911\n",
      "  0.09495855  0.09489557  0.09483084  0.09474926]\n",
      "tsvd Explained Variance Ratio\n",
      "[ 0.00099672  0.00114494  0.00114422  0.00114236  0.00114118  0.00114032\n",
      "  0.00113947  0.00113872  0.00113794  0.00113696]\n",
      "CPU times: user 15min 32s, sys: 4min 23s, total: 19min 56s\n",
      "Wall time: 41.2 s\n"
     ]
    }
   ],
   "source": [
    "%%time \n",
    "start_time = time.time()\n",
    "model = tsvd(n_components=k, algorithm=\"arpack\")\n",
    "model.fit(X)\n",
    "end_time = time.time() - start_time\n",
    "printmd(\"**Total time for arpack scikit-learn tsvd is \" + str(end_time) + \"**\")\n",
    "print(\"tsvd Singular Values\")\n",
    "print(model.singular_values_)\n",
    "print(\"tsvd Components (V^T)\")\n",
    "print(model.components_)\n",
    "print(\"tsvd Explained Variance\")\n",
    "print(model.explained_variance_)\n",
    "print(\"tsvd Explained Variance Ratio\")\n",
    "print(model.explained_variance_ratio_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "**Total time for randomized scikit-learn tsvd is 13.020515203475952**"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tsvd Singular Values\n",
      "[ 7072.4289909    133.37031017   133.22144366   133.05844997   132.90668912\n",
      "   132.83067352   132.75548619   132.67283248   132.49428064   132.39504544]\n",
      "tsvd Components (V^T)\n",
      "[[ 0.03160822  0.03163449  0.03162544 ...,  0.0316014   0.031678\n",
      "   0.03151319]\n",
      " [ 0.01463285  0.00081001 -0.0430451  ..., -0.01419612  0.0010795\n",
      "  -0.0302737 ]\n",
      " [-0.03593592 -0.00264458 -0.06579635 ..., -0.03984711  0.02088399\n",
      "  -0.03138619]\n",
      " ..., \n",
      " [-0.01864775 -0.0305044  -0.00625413 ..., -0.03590451 -0.03470779\n",
      "   0.01215434]\n",
      " [-0.02234073  0.03082768  0.00760221 ...,  0.00105271  0.02961436\n",
      "  -0.02676475]\n",
      " [ 0.04186557 -0.0226522   0.00636629 ...,  0.01948014 -0.01271316\n",
      "   0.02798382]]\n",
      "tsvd Explained Variance\n",
      "[ 0.08306231  0.0889382   0.08873977  0.08852276  0.08832094  0.08821994\n",
      "  0.0881201   0.0880104   0.08777367  0.08764224]\n",
      "tsvd Explained Variance Ratio\n",
      "[ 0.00099672  0.00106723  0.00106485  0.00106224  0.00105982  0.00105861\n",
      "  0.00105741  0.0010561   0.00105326  0.00105168]\n",
      "CPU times: user 13 s, sys: 27.8 s, total: 40.7 s\n",
      "Wall time: 13 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "start_time = time.time()\n",
    "model = tsvd(n_components=k, algorithm=\"randomized\")\n",
    "model.fit(X)\n",
    "end_time = time.time() - start_time\n",
    "printmd(\"**Total time for randomized scikit-learn tsvd is \" + str(end_time) + \"**\")\n",
    "print(\"tsvd Singular Values\")\n",
    "print(model.singular_values_)\n",
    "print(\"tsvd Components (V^T)\")\n",
    "print(model.components_)\n",
    "print(\"tsvd Explained Variance\")\n",
    "print(model.explained_variance_)\n",
    "print(\"tsvd Explained Variance Ratio\")\n",
    "print(model.explained_variance_ratio_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
